library(rgdal)
library(dplyr)
library(rgeos)
library(readr)
library(xtable)
library(sp)


# what we will do is find border census tracts just like we did border precincts (they are roughly the same size);
# then we will subset our grouping of tracts to just those that fall within a randome selection of 10 
# border pairings; then we will merge in census data and evaluate demographic balance across a variety of demographics

# prep census tract so they have county names instead of numbers
tractshp <- readOGR(dsn = "../shapes/tl_2017_37_tract", layer = "tl_2017_37_tract")
tractshp <- spTransform(x=tractshp, CRSobj=CRS("+proj=longlat +datum=WGS84"))

library(reshape2)
library(stringr)

county_names <- read.csv(file = "race_eth.csv")
split<-colsplit(county_names$GEO.display.label, " County,", names=c("a","b"))
COUNTY_NAM<-split$a
county_names<-cbind(COUNTY_NAM,county_names)
county_names<-county_names %>% mutate(COUNTY_NAM = toupper(split$a))

county_names$GEO.id2 <- gsub("37", "", county_names$GEO.id2)
county_names<-county_names %>% select(GEO.id2, COUNTY_NAM)

tractshp <- merge(x=tractshp, y=county_names, by.x="COUNTYFP", by.y="GEO.id2")

# get border file
countyshp <- readOGR(dsn = "../shapes/CountyBoundary", layer = "CountyBoundary")
countyshp <- spTransform(x=countyshp, CRSobj=CRS("+proj=longlat +datum=WGS84"))

countyshp$NAME <- gsub(" ", "_", countyshp$NAME)
tractshp$COUNTY_NAM <- gsub(" ", "_", tractshp$COUNTY_NAM)

# intersect
intersects <- data.frame(gIntersects(countyshp, tractshp, byid = TRUE))
names(intersects) <- as.character(countyshp$NAME)

border_tracts <- cbind (tractshp$COUNTY_NAM,
                        tractshp$GEOID,
                        intersects)

names(border_tracts) <- gsub("^tract.*\\$","",names(border_tracts))

## Now identify the tracts that fall within the random selection of border pairs

border_tracts_sample<- filter(border_tracts, 
                              #county pair 1
                              c(CASWELL==TRUE & COUNTY_NAM=="ROCKINGHAM")| 
                                c(ROCKINGHAM==TRUE & COUNTY_NAM=="CASWELL")|
                                #county pair 2
                                c(LENOIR==TRUE & COUNTY_NAM=="PITT")| 
                                c(PITT==TRUE & COUNTY_NAM=="LENOIR") |
                                #county pair 3
                                c(CUMBERLAND==TRUE & COUNTY_NAM=="HARNETT")| 
                                c(HARNETT==TRUE & COUNTY_NAM=="CUMBERLAND") |
                                #county pair 4
                                c(CHEROKEE==TRUE & COUNTY_NAM=="MACON")| 
                                c(MACON==TRUE & COUNTY_NAM=="CHEROKEE") |
                                #county pair 5
                                c(IREDELL==TRUE & COUNTY_NAM=="WILKES")| 
                                c(WILKES==TRUE & COUNTY_NAM=="IREDELL")|
                                #county pair 6
                                c(CUMBERLAND==TRUE & COUNTY_NAM=="HOKE")|
                                c(HOKE==TRUE & COUNTY_NAM=="CUMBERLAND")|
                                #county pair 7
                                c(JOHNSTON==TRUE & COUNTY_NAM=="NASH")|
                                c(NASH==TRUE & COUNTY_NAM=="JOHNSTON")|
                                #county pair 8
                                c(DAVIDSON==TRUE & COUNTY_NAM=="FORSYTH")|
                                c(FORSYTH==TRUE & COUNTY_NAM=="DAVIDSON")|
                                #county pair 9
                                c(IREDELL==TRUE & COUNTY_NAM=="YADKIN")|
                                c(YADKIN==TRUE & COUNTY_NAM=="IREDELL")|
                                #county pair 10
                                c(BLADEN==TRUE & COUNTY_NAM=="COLUMBUS")|
                                c(COLUMBUS==TRUE & COUNTY_NAM=="BLADEN")) 


# make a variable to indicate which pairing each tract belongs to
border_tracts_sample<- border_tracts_sample %>% mutate (border_pair=ifelse(c(CASWELL==TRUE & COUNTY_NAM=="ROCKINGHAM")| 
                                                                             c(ROCKINGHAM==TRUE & COUNTY_NAM=="CASWELL"),"CASWELL_ROCKINGHAM",
                                                                           ifelse(c(LENOIR==TRUE & COUNTY_NAM=="PITT")| 
                                                                             c(PITT==TRUE & COUNTY_NAM=="LENOIR"), "LENOIR_PITT", 
                                                                             ifelse(c(CUMBERLAND==TRUE & COUNTY_NAM=="HARNETT")| 
                                                                              c(HARNETT==TRUE & COUNTY_NAM=="CUMBERLAND"),"CUMBERLAND_HARNETT",
                                                                              ifelse(c(CHEROKEE==TRUE & COUNTY_NAM=="MACON")| 
                                                                                 c(MACON==TRUE & COUNTY_NAM=="CHEROKEE"),"CHEROKEE_MACON",
                                                                                  ifelse(c(IREDELL==TRUE & COUNTY_NAM=="WILKES")| 
                                                                                     c(WILKES==TRUE & COUNTY_NAM=="IREDELL"),"WILKES_IREDELL",
                                                                                      ifelse( c(CUMBERLAND==TRUE & COUNTY_NAM=="HOKE")|
                                                                                         c(HOKE==TRUE & COUNTY_NAM=="CUMBERLAND"),"CUMBERLAND_HOKE",
                                                                                          ifelse( c(JOHNSTON==TRUE & COUNTY_NAM=="NASH")|
                                                                                             c(NASH==TRUE & COUNTY_NAM=="JOHNSTON"), "NASH_JOHNSTON",
                                                                                               ifelse (c(DAVIDSON==TRUE & COUNTY_NAM=="FORSYTH")|
                                                                                                  c(FORSYTH==TRUE & COUNTY_NAM=="DAVIDSON"), "FORSYTH_DAVIDSON",
                                                                                                   ifelse (c(IREDELL==TRUE & COUNTY_NAM=="YADKIN")|
                                                                                                      c(YADKIN==TRUE & COUNTY_NAM=="IREDELL"),"YADKIN_IREDELL",
                                                                                                        ifelse( c(BLADEN==TRUE & COUNTY_NAM=="COLUMBUS")|
                                                                                                         c(COLUMBUS==TRUE & COUNTY_NAM=="BLADEN"),"COLUMBUS_BLADEN","1")))))))))))
                                                                           
                                                                           
                                                                           
                                                                           
                                                                           
                                                                           
                                                                           
                                                                           
                                                                           
table(border_tracts_sample$border_pair)                                                                           
border_tracts_sample <-border_tracts_sample %>% select(c(GEOID, COUNTY_NAM,border_pair))

# now merge in demographic data...

# employment and poverty

unemp_pov <- read.csv(file="ACS_16_5YR_S2301_with_ann.csv", skip = 2, header = F)
unemp_pov_head <- read.csv(file="ACS_16_5YR_S2301_with_ann.csv")
names(unemp_pov) <- names(unemp_pov_head)

unemp_pov <- unemp_pov %>% select("GEO.id2","HC02_EST_VC01","HC01_EST_VC36","HC01_EST_VC37","GEO.display.label")
unemp_pov$HC02_EST_VC01<-as.numeric(as.character(unemp_pov$HC02_EST_VC01))
unemp_pov$HC01_EST_VC36<-as.numeric(as.character(unemp_pov$HC01_EST_VC36))
unemp_pov$HC01_EST_VC37<-as.numeric(as.character(unemp_pov$HC01_EST_VC37))

unemp_pov <- unemp_pov %>% mutate(percent_employed = HC02_EST_VC01*.01,
                                  percent_poverty = HC01_EST_VC36/(HC01_EST_VC36+HC01_EST_VC37))

# educational attainment
education <- read.csv(file="ACS_16_5YR_S1501_with_ann.csv", skip = 2, header = F)
education_head <- read.csv(file="ACS_16_5YR_S1501_with_ann.csv")
names(education) <- names(education_head)

education <- education %>% select("GEO.id2","HC02_EST_VC09","HC02_EST_VC18","GEO.display.label")
education$HC02_EST_VC09<-as.numeric(as.character(education$HC02_EST_VC09))
education$HC02_EST_VC18<-as.numeric(as.character(education$HC02_EST_VC18))

education <- education %>% mutate(percent_lessthanHS = HC02_EST_VC09*.01,
                                  percent_BAplus = HC02_EST_VC18*.01)

# race and age
race_age <- read.csv(file="ACS_16_5YR_DP05_with_ann.csv", skip = 2, header = F)
race_age_head <- read.csv(file="ACS_16_5YR_DP05_with_ann.csv")
names(race_age) <- names(race_age_head)

race_age <- race_age %>% select("GEO.id2", "HC01_VC23","HC03_VC88","HC03_VC94","HC03_VC95","GEO.display.label")
race_age$HC01_VC23 <- as.numeric(as.character(race_age$HC01_VC23))
race_age$HC03_VC88 <- as.numeric(as.character(race_age$HC03_VC88))
race_age$HC03_VC94 <- as.numeric(as.character(race_age$HC03_VC94))
race_age$HC03_VC95 <- as.numeric(as.character(race_age$HC03_VC95))

race_age <- race_age %>% mutate(percent_latino = HC03_VC88*.01,
                                percent_white = HC03_VC94*.01,
                                percent_black = HC03_VC95*.01 )

# merge together to get a dataframe if census demographics for all tracts in North Carolina

census_demos <- left_join(unemp_pov, education,by="GEO.id2")
census_demos <- left_join(census_demos, race_age,by="GEO.id2")
census_demos <- census_demos %>% select(percent_employed,percent_poverty,percent_lessthanHS,
                                        percent_BAplus, percent_latino,percent_white,percent_black,HC01_VC23,GEO.id2,GEO.display.label)

census_demos <- rename(census_demos,"median_age"=HC01_VC23,"GEOID"=GEO.id2)

head(census_demos)
# merge census data with border_tracts_sample


border_tracts_sample$GEOID <- as.numeric(as.character(border_tracts_sample$GEOID))
border_tracts_demos <- inner_join(border_tracts_sample, census_demos,by="GEOID")
border_tracts_demos <- border_tracts_demos %>% na.omit()

# balance table 
library(dplyr)
library(data.table)

# means for each border pair, compared to means for a randomly drawn census tract to which they are not compared

head(census_demos)

border_tracts_sample$GEOID <- as.numeric(as.character(border_tracts_sample$GEOID))
border_tracts_demos2 <- left_join(census_demos,border_tracts_sample,by="GEOID")
border_tracts_demos2 <- border_tracts_demos2 %>% mutate(border_pair_flag=ifelse(is.na(border_pair), 0, 1))
border_tracts_demos2 <- filter(border_tracts_demos2, border_pair_flag == 0)
border_tracts_demos2 <- select(border_tracts_demos2, c(-COUNTY_NAM, -border_pair))
border_tracts_demos2 <- border_tracts_demos2 %>% na.omit()

set.seed(10)
tracts_to_compare<-border_tracts_demos2[sample(nrow(border_tracts_demos2), 10), ]

split<-colsplit(tracts_to_compare$GEO.display.label, ",", names=c("a","b"))
split<-colsplit(split$b,"County,",names=c("a","b"))
COUNTY_NAM<-split$a
tracts_to_compare<-cbind(COUNTY_NAM,tracts_to_compare)
tracts_to_compare<-tracts_to_compare %>% mutate(COUNTY_NAM = toupper(split$a))


# forsyth/davidson
dat <- select((filter(border_tracts_demos, border_pair=="FORSYTH_DAVIDSON")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[1,2:9]))
colnames(pair_compare)<-tracts_to_compare[1,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                       "% Latino","% White","% Black","Median Age")
pair1<-pair

### caswell/rockingham
dat <- select((filter(border_tracts_demos, border_pair=="CASWELL_ROCKINGHAM")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[2,2:9]))
colnames(pair_compare)<-tracts_to_compare[2,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair2<-pair

### cherokee/macon
dat <- select((filter(border_tracts_demos, border_pair=="CHEROKEE_MACON")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[3,2:9]))
colnames(pair_compare)<-tracts_to_compare[3,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair3<-pair

## Columbus/bladen
dat <- select((filter(border_tracts_demos, border_pair=="COLUMBUS_BLADEN")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[4,2:9]))
colnames(pair_compare)<-tracts_to_compare[4,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair4<-pair

## cumberland/harnett
dat <- select((filter(border_tracts_demos, border_pair=="CUMBERLAND_HARNETT")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[5,2:9]))
colnames(pair_compare)<-tracts_to_compare[5,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair5<-pair

## cumberland/hoke
dat <- select((filter(border_tracts_demos, border_pair=="CUMBERLAND_HOKE")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[6,2:9]))
colnames(pair_compare)<-tracts_to_compare[6,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair6<-pair

### lenoir/pitt
dat <- select((filter(border_tracts_demos, border_pair=="LENOIR_PITT")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[7,2:9]))
colnames(pair_compare)<-tracts_to_compare[7,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair7<-pair

## nash/johnston
dat <- select((filter(border_tracts_demos, border_pair=="NASH_JOHNSTON")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[8,2:9]))
colnames(pair_compare)<-tracts_to_compare[8,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair8<-pair

### wilkes/iredell
dat <- select((filter(border_tracts_demos, border_pair=="WILKES_IREDELL")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[9,2:9]))
colnames(pair_compare)<-tracts_to_compare[9,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair9<-pair

## yadkin/iredell
dat <- select((filter(border_tracts_demos, border_pair=="YADKIN_IREDELL")),-c(GEOID,border_pair,GEO.display.label))
means<-dat %>% group_by(COUNTY_NAM) %>% summarise_all(funs(mean))
pair <-data.frame(t(means[,-1]))
n<-means$COUNTY_NAM
colnames(pair) <- n

pair_compare<- data.frame(t(tracts_to_compare[10,2:9]))
colnames(pair_compare)<-tracts_to_compare[10,1]
pair<-cbind(pair, pair_compare)

pair<- pair %>% mutate(`MATCHED DIFF` = pair[,1]-pair[,2],
                       `RANDOM DIFF` = pair[,1]-pair[,3])
rownames(pair) <- c("% Employed", "% Poverty", "% Less than HS", "% College Grad",
                    "% Latino","% White","% Black","Median Age")
pair10<-pair

# write to .tex tables

xt <- xtable(x = pair1,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair1")


addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair1.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 2
xt <- xtable(x = pair2,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair2")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))


print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair2.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 3
xt <- xtable(x = pair3,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair3")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair3.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 4
xt <- xtable(x = pair4,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair4")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair4.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 5
xt <- xtable(x = pair5,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair5")


addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))


print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair5.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 6
xt <- xtable(x = pair6,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair6")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))


print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair6.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 7
xt <- xtable(x = pair7,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair7")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair7.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 8
xt <- xtable(x = pair8,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair8")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair8.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 9
xt <- xtable(x = pair9,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair9")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt,
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair9.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)

# pair 10
xt <- xtable(x = pair10,
             align = "lccccc",
             type="latex",
             digits = 3,
             caption = "Comparison of key demographics between paired border census tracts, and tracts drawn at random",
             label = "tab:demos_pair10")

addtorow <- list()
addtorow$pos <- list(nrow(xt))
addtorow$command <- c(paste0("\\hline\n\\multicolumn{6}{l}{\\emph{Note: `Matched difference' indicates the percentage point difference between the census tracts }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{on the border of county 1 and county 2, included in the analysis.}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{`Random difference' indicates the percentage point difference between the census tracts on the border }}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{of county 1 (included in the analysis) and a tract drawn at random (not included in the analysis).}}\\\\\n",
                             "\\multicolumn{6}{l}{\\emph{}}\\\\\n"))

print (xt, 
       size="\\footnotesize",
       format.args = list(big.mark = ","),
       caption.placement = "top",
       file = "appendix_demos_pair10.tex", 
       #include.rownames = FALSE,
       hline.after = c(-1, 0),
       add.to.row = addtorow)


stop()


################## THIS is how I carried out the random selection ##############


# read county pairs
county_pairs <- read_delim(file = "../data/county-early-voting-differences.csv", delim = "\t", quote = "", col_names = TRUE)

### randomly select 10 county pairs using the county-early-voting-differences file

pairs_for_demos<-county_pairs[sample(nrow(county_pairs), 10), ]
pairs_to_match<-pairs_for_demos[,1:2]

print(pairs_to_match)


